d <-#read_csv(paste0(home, "/data/for-analysis/dat.csv")) |> read_csv("https://raw.githubusercontent.com/maxlindmark/perch-growth/master/data/for-analysis/dat.csv") |>filter(age_ring =="Y", # use only length-at-age by filtering on age_ring!area =="VN")
Rows: 338460 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (6): age_ring, area, gear, ID, sex, keep
dbl (6): length_mm, age_bc, age_catch, catch_year, cohort, final_length
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
filter: removed 36,279 rows (11%), 302,181 rows remaining
# Sample size by area and cohortns <- d |>group_by(cohort, area) |>summarise(n =n())
group_by: 2 grouping variables (cohort, area)
summarise: now 453 rows and 3 columns, one group variable remaining (cohort)
# Minimum number of observations per area and cohortd$area_cohort <-as.factor(paste(d$area, d$cohort))d <- d |>group_by(area_cohort) |>filter(n() >100)
# Minimum number of observations per area, cohort, and aged$area_cohort_age <-as.factor(paste(d$area, d$cohort, d$age_bc))d <- d |>group_by(area_cohort_age) |>filter(n() >10)
# We can also consider removing individuals where the SE of k is larger than the fitIVBG |>#mutate(keep = ifelse(k > quantile(k_se, probs = 0.95), "N", "Y")) |>mutate(keep =ifelse(k < k_se, "N", "Y")) |>#filter(row_id < 10000) |>ggplot(aes(row_id, k, ymin = k_lwr, ymax = k_upr, color = keep)) +geom_point(alpha =0.5) +facet_wrap(~keep, ncol =1) +geom_errorbar(alpha =0.5) +NULL
mutate: new variable 'keep' (character) with 2 unique values and 0% NA
# Add back cohort and area variablesIVBG <- IVBG |>left_join(d |>select(ID, area_cohort)) |>separate(area_cohort, into =c("area", "cohort"), remove =TRUE, sep =" ") |>mutate(cohort =as.numeric(cohort))
Joining with `by = join_by(ID)`
left_join: added 2 columns (area_cohort_age, area_cohort)
> rows only in x 0
> rows only in y ( 97,478)
> matched rows 198,323 (includes duplicates)
> =========
> rows total 198,323
mutate: converted 'cohort' from character to double (0 new NA)
# Summarise and save for sample sizesample_size <- IVBG |>group_by(area) |>summarise(n_cohort =length(unique(cohort)),min_cohort =min(cohort),max_cohort =min(cohort),n_individuals =length(unique(ID)),n_data_points =n())
group_by: one grouping variable (area)
summarise: now 11 rows and 6 columns, ungrouped
write_csv(sample_size, paste0(home, "/output/sample_size.csv"))# Compare how the means and quantiles differ depending on this filteringIVBG_filter <- IVBG |>drop_na(k_se) |>#filter(k_se < quantile(k_se, probs = 0.95)) |> filter(k_se < k)
group_by: 2 grouping variables (cohort, area)
summarize: now 343 rows and 6 columns, one group variable remaining (cohort)
ungroup: no grouping variables
ggplot() +geom_line(data = VBG, aes(cohort, k_median, color ="All k's")) +geom_line(data = VBG_filter, aes(cohort, k_median, color ="Filtered")) +guides(fill ="none") +facet_wrap(~area)
# No difference at all really. We should probably just discuss that with this model, achieving biologically reasonable parameter values and a good fit to data are sometimes two different things. In our case, we just want a representative value of the growth (as in time to reach average maximum size in the population) that accounts for the entire growth trajectory of an individual, for each area and cohort.
BT 1996 has a very high k, so I’ll inspect it in more detail
Rows: 395 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): area, model
dbl (2): year, temp
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
rename: renamed one variable (cohort)
VBG <- VBG |>left_join(pred_temp, by =c("area", "cohort"))
left_join: added 2 columns (temp, model)
> rows only in x 0
> rows only in y ( 52)
> matched rows 343
> =====
> rows total 343
# Save data for map-plotcohort_sample_size <- IVBG |>group_by(area, cohort) |>summarise(n =n()) # individuals, not samples!
group_by: 2 grouping variables (area, cohort)
summarise: now 343 rows and 3 columns, one group variable remaining (area)
VBG <-left_join(VBG, cohort_sample_size, by =c("cohort", "area"))
left_join: added one column (n)
> rows only in x 0
> rows only in y ( 0)
> matched rows 343
> =====
> rows total 343
write_csv(VBG, paste0(home, "/output/vbg.csv"))# Calculate the plotting order (also used for map plot)order <- VBG |>ungroup() |>group_by(area) |>summarise(min_temp =min(temp)) |>arrange(desc(min_temp))
ungroup: no grouping variables
group_by: one grouping variable (area)
summarise: now 11 rows and 2 columns, ungrouped
order
# A tibble: 11 × 2
area min_temp
<chr> <dbl>
1 SI_HA 7.83
2 TH 6.85
3 BS 6.22
4 BT 5.87
5 FM 5.87
6 SI_EK 5.49
7 MU 5.16
8 FB 5.04
9 HO 3.99
10 JM 3.37
11 RA 3.06
nareas <-length(unique(order$area)) +2# to skip the brightest colorscolors <-colorRampPalette(brewer.pal(name ="RdYlBu", n =10))(nareas)[-c(7,8)]
Plot VBGE fits
# Sample 50 IDs per area and plot their data and VBGE fitsset.seed(4)ids <- IVBG |>distinct(ID, .keep_all =TRUE) |>group_by(area) |>slice_sample(n =30)
ggsave(paste0(home, "/figures/vb_pars.pdf" ), width =17, height =17, units ="cm")
Fit Sharpe-Schoolfield model to K
By area
model <-'sharpeschoolhigh_1981'# Get starting values on full dataset for Sharpe-Schoolfield modeldat <- VBG |>select(k_median, temp, area) |>rename(rate = k_median)
lower <-get_lower_lims(dat$temp, dat$rate, model_name = model)upper <-get_upper_lims(dat$temp, dat$rate, model_name = model)start <-get_start_vals(dat$temp, dat$rate, model_name = model)# Sharpe-Schoolfield model fit to data for each areapreds <-NULLpred <-NULLfor(a inunique(dat$area)) {# Get data dd <- dat |>filter(area == a)# Fit model fit <-nls_multstart( rate ~sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref =8),data = dd,iter =c(3, 3, 3, 3),start_lower = start*0.5,start_upper = start*2,lower = lower,upper = upper,supp_errors ='Y' )# Make predictions on new data new_data <-data.frame(temp =seq(min(dd$temp), max(dd$temp), length.out =100)) pred <-augment(fit, newdata = new_data) |>mutate(area = a)# Add to general data frame preds <-data.frame(rbind(preds, pred))}
filter: removed 280 rows (82%), 63 rows remaining
mutate: new variable 'area' (character) with one unique value and 0% NA
filter: removed 305 rows (89%), 38 rows remaining
mutate: new variable 'area' (character) with one unique value and 0% NA
filter: removed 302 rows (88%), 41 rows remaining
mutate: new variable 'area' (character) with one unique value and 0% NA
filter: removed 308 rows (90%), 35 rows remaining
mutate: new variable 'area' (character) with one unique value and 0% NA
filter: removed 295 rows (86%), 48 rows remaining
mutate: new variable 'area' (character) with one unique value and 0% NA
filter: removed 316 rows (92%), 27 rows remaining
mutate: new variable 'area' (character) with one unique value and 0% NA
filter: removed 324 rows (94%), 19 rows remaining
mutate: new variable 'area' (character) with one unique value and 0% NA
filter: removed 309 rows (90%), 34 rows remaining
mutate: new variable 'area' (character) with one unique value and 0% NA
filter: removed 325 rows (95%), 18 rows remaining
mutate: new variable 'area' (character) with one unique value and 0% NA
filter: removed 328 rows (96%), 15 rows remaining
mutate: new variable 'area' (character) with one unique value and 0% NA
filter: removed 338 rows (99%), 5 rows remaining
mutate: new variable 'area' (character) with one unique value and 0% NA
All areas pooled
# One sharpe schoolfield fitted to all datafit_all <-nls_multstart( rate ~sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref =8),data = dat,iter =c(3, 3, 3, 3),start_lower = start*0.5,start_upper = start*2,lower = lower,upper = upper,supp_errors ='Y' )# Make predictions on new datanew_data_all <-data.frame(temp =seq(min(dat$temp), max(dat$temp), length.out =100))pred_all <-augment(fit_all, newdata = new_data_all) |>mutate(area ="all")
mutate: new variable 'area' (character) with one unique value and 0% NA
pivot_wider: reorganized (par, est) into (r_tref, e, eh, th) [was 4x2, now 1x4]
summarise: now one row and one column, ungrouped
# A tibble: 1 × 1
t_opt
<dbl>
1 6.53
Plot Sharpe Schoolfield fits
# Plot growth coefficients by year and area against mean SSTp1 <- preds |>ggplot(aes(temp, .fitted, color =factor(area, order$area))) +geom_point(data = dat, aes(temp, rate, color =factor(area, order$area)), size =1, alpha =0.8) +geom_line(linewidth =1) +geom_line(data = pred_all, aes(temp, .fitted), linewidth =1, inherit.aes =FALSE, linetype =2) +scale_color_manual(values = colors, name ="Area") +guides(color =guide_legend(nrow =1, reverse =TRUE, title.position ="top", title.hjust =0.5,override.aes =list(size =1))) +scale_x_continuous(breaks =seq(-5, 20, 1)) +labs(x ="Temperature (°C)",y ="von Bertalanffy growth coefficient (*k*)") +theme(axis.title.y = ggtext::element_markdown(),legend.position ="bottom",legend.direction ="horizontal")p1 +facet_wrap(~factor(area, levels = (arrange(area_attr, desc(lat)))$area))
p1
ggsave(paste0(home, "/figures/sharpe_school.pdf" ), width =17, height =17, units ="cm")
Can we fit a single Sharpe Schoolfield with area-specific parameters with brms?
# Again, here are the data we are fitting:ggplot(dat, aes(temp, rate, color =factor(area, order$area))) +geom_point(data = dat, aes(temp, rate, color =factor(area, order$area)), size =0.6)
# Which family to use? Well we can try with a Gaussian or a student-t first... # After that, maybe gamma or lognormal, if we predict negative K'shist(dat$rate)
# Add in fixed parametersdat$bk <-8.62e-05dat$tref <-8+273.15# We can use the nls() parameters as starting valuessummary(fit_all)
Formula: rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th,
tref = 8)
Parameters:
Estimate Std. Error t value Pr(>|t|)
r_tref 0.5265 0.5818 0.905 0.3661
e 1.3179 1.1940 1.104 0.2705
eh 1.8364 0.7329 2.506 0.0127 *
th 6.5328 6.1852 1.056 0.2916
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.05265 on 339 degrees of freedom
Number of iterations to convergence: 33
Achieved convergence tolerance: 1.49e-08
# As a first guess, I use the nls estimate as the mean, and standard deviation something close to it. We can also bound som fo them by 0, since negative values shouldn't be possiblen=10000hist(rnorm(0.4, 0.4, n=n), main ="rtref")
dat |>data_grid(temp =seq_range(temp, n =51)) |>ungroup() |>mutate(bk =8.62e-05,tref =8+273.15) |>add_epred_draws(fit_global) |>mutate(rtref =0.4391, e =1.4258, eh =2.1066, th =7.5369) |># from summary(fit_all)mutate(man_pred = (rtref *exp(e/bk * (1/tref -1/(temp +273.15)))) / (1+exp(eh/bk * (1/(th +273.15) -1/(temp +273.15))))) |>ggplot(aes(temp, y = .epred)) +stat_lineribbon(.width =c(0.95), alpha =0.3, color ="black", fill ="black") +geom_line(aes(y = man_pred), color ="tomato3", linetype =2) +geom_point(data = dat, aes(temp, rate, color =factor(area, order$area)), size =0.6) +scale_color_manual(values = colors, name ="Area")
ungroup: no grouping variables
mutate: new variable 'bk' (double) with one unique value and 0% NA
new variable 'tref' (double) with one unique value and 0% NA
mutate (grouped): new variable 'rtref' (double) with one unique value and 0% NA
new variable 'e' (double) with one unique value and 0% NA
new variable 'eh' (double) with one unique value and 0% NA
new variable 'th' (double) with one unique value and 0% NA
mutate (grouped): new variable 'man_pred' (double) with 51 unique values and 0% NA
Ok, seems to work with priors and everything. They are roughly as broad as can be and still have a fit. Now fit the full model, with random area effects, more iterations and chains!
# Still good enough, might change some priors. Now look at random area effects!# Gaussian modelfitb <-brm(bf(rate ~ (rtref *exp(e/bk * (1/tref -1/(temp +273.15)))) / (1+exp(eh/bk * (1/(th +273.15) -1/(temp +273.15)))), rtref + e + eh + th ~1+ (1|area),nl =TRUE),data = dat,cores =4,chains =4,iter =4000,sample_prior ="yes",save_pars =save_pars(all =TRUE),seed =9,prior =c(prior(normal(0.4, 0.4), nlpar ="rtref", lb =0),prior(normal(1.4, 0.6), nlpar ="e"),prior(normal(2.1, 1.5), nlpar ="eh"),prior(normal(7.5, 4), nlpar ="th", lb =0) ),control =list(adapt_delta =0.95, max_treedepth =12))
Compare fitted models based on ELPD. Preferred model in the first row!
fitb_loo <-loo(fitb, moment_match =TRUE)
Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
Warning: Found 2 observations with a pareto_k > 0.7 in model 'fitb'. It is
recommended to set 'reloo = TRUE' in order to calculate the ELPD without the
assumption that these observations are negligible. This will refit the model 2
times to compute the ELPDs for the problematic observations directly.
fitbs_loo <-loo(fitbs, moment_match =TRUE)
Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
Warning: Found 1 observations with a pareto_k > 0.7 in model 'fitbs'. It is
recommended to set 'reloo = TRUE' in order to calculate the ELPD without the
assumption that these observations are negligible. This will refit the model 1
times to compute the ELPDs for the problematic observations directly.
loo_compare(fitb_loo, fitbs_loo)
elpd_diff se_diff
fitbs 0.0 0.0
fitb -10.9 10.8
The student model is preferred. Inspect it!
# Check fitsummary(fitbs)
Family: student
Links: mu = identity; sigma = identity; nu = identity
Formula: rate ~ (rtref * exp(e/bk * (1/tref - 1/(temp + 273.15))))/(1 + exp(eh/bk * (1/(th + 273.15) - 1/(temp + 273.15))))
rtref ~ 1 + (1 | area)
e ~ 1 + (1 | area)
eh ~ 1 + (1 | area)
th ~ 1 + (1 | area)
Data: dat (Number of observations: 343)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Group-Level Effects:
~area (Number of levels: 11)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(rtref_Intercept) 0.06 0.04 0.00 0.16 1.00 1640 2267
sd(e_Intercept) 0.21 0.14 0.01 0.53 1.00 2193 2920
sd(eh_Intercept) 0.22 0.20 0.01 0.77 1.00 1768 2734
sd(th_Intercept) 0.72 0.62 0.03 2.30 1.00 1820 3322
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
rtref_Intercept 0.58 0.21 0.29 1.10 1.00 1705 2567
e_Intercept 1.14 0.41 0.42 1.97 1.00 1899 2678
eh_Intercept 1.63 0.44 0.71 2.42 1.00 2703 2948
th_Intercept 6.54 2.60 2.40 12.92 1.00 1666 2308
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.04 0.00 0.03 0.04 1.00 7536 6447
nu 10.49 4.47 5.19 21.66 1.00 8058 6057
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fitbs)
pp_check(fitbs)
Using 10 posterior draws for ppc type 'dens_overlay' by default.
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 3 variables (e, eh, th)
mutate: new variable 'kb' (double) with one unique value and 0% NA
ungroup: no grouping variables
Warning: There was 1 warning in `.fun()`.
ℹ In argument: `t_opt = (eh * th)/(eh + kb * th * log((eh/e) - 1))`.
Caused by warning in `log()`:
! NaNs produced
mutate: new variable 't_opt' (double) with 7,871 unique values and 2% NA
# Make the main plot (conditional effect of temperature, with and without random effects)# Predictions without random effectsglob <- dat |>data_grid(temp =seq_range(temp, n =100)) |>mutate(bk =8.62e-05,tref =8+273.15) |>add_epred_draws(fitbs, re_formula =NA) |>ungroup()
mutate: new variable 'bk' (double) with one unique value and 0% NA
new variable 'tref' (double) with one unique value and 0% NA